
***********************************************************************************************;
*   Code Name: Part4RegionalImmigrantRatio.sas
  	Authors: Per Siden and Thomas Jansson (Sveriges Riksbank)
	Created: February 8, 2013
	Last edited: April 13, 2015
***********************************************************************************************;

* This is the fourth step in a series of codes used in this project.
  The original thought is to run the codes in the following order:
	1. Part1InitializeLinda.sas
	2. Part2IncludeAssets.sas
	3. Part3CreateHHVariables.sas
	4. Part4RegionalImmigrantRatio.sas

* In this file we create a variable region_immigrant_ratio which tells the proportion
  of immigrants living in the area closest to each individual. The area will be 
  "f�rsamling/parish/region" in case there are more than 20 sampled persons in the parish 
  in the lindaf-sample, otherwise it will be at "kommun/municipality level". We will also
  construct variables for lan(county), muni, and region.
***********************************************************************************************;

DM 'CLEAR LOG';
DM 'CLEAR OUTPUT';
Proc datasets LIB = work KILL;

***********************************************************************************************;
%macro m3ba;
%do y = 1999 %to 2007;

data temp.lindaf;
	set main.lindaf&y(keep = id sampled_f region birthcountry_code);
run;

data temp.lindaf;
	set temp.lindaf;
	if sampled_f = 1;
	muni = substr(region,1,4);
run;

data _NULL_;
	set temp.lindaf end = last;
	retain pop 0;
	if birthcountry_code ^= 'SE' then immipopf + 1;
	if last then call symput('immipopf',immipopf);
run;

data temp.lindai;
	set main.lindai&y(keep = id sampled_i region birthcountry_code);
run;

data temp.lindai;
	set temp.lindai;
	if sampled_i = 1;
	muni = substr(region,1,4);
run;

data _NULL_;
	set temp.lindai end = last;
	if last then call symput('immipopi',_N_);
run;

proc freq data = temp.lindaf;
	tables region / noprint out = temp.popbyregion;
run;

proc freq data = temp.lindai;
	tables region / noprint out = temp.immibyregion;
run;

proc freq data = temp.lindaf;
	tables muni / noprint out = temp.popbymuni;
run;

proc freq data = temp.lindai;
	tables muni / noprint out = temp.immibymuni;
run;

data temp.popbyregion;
	set temp.popbyregion;
	muni = substr(region,1,4);
run;

data temp.immibyregion;
	set temp.immibyregion;
	muni = substr(region,1,4);
run;

data temp.byregion;
	merge temp.popbyregion(keep = muni region count rename=(count = popcountregion)) 
	  temp.immibyregion(keep = muni region count rename=(count = immicountregion));
	by region;
run;

data temp.immiratbyregion;
	merge temp.byregion
		  temp.popbymuni(in = left keep = muni count rename=(count = popcountmuni)) 
		  temp.immibymuni(in = right keep = muni count rename=(count = immicountmuni));
	by muni;
	if left or right;
run;

data temp.immiratbyregion&y;
	set temp.immiratbyregion;
	if popcountregion >= 20 then immi_rat&y = (immicountregion/popcountregion)*(&immipopf/&immipopi);
	else immi_rat&y = (immicountmuni/popcountmuni)*(&immipopf/&immipopi);
	keep region muni immi_rat&y;
run;

proc sort data = main.lindaf&y;
	by region;
run;

proc sort data = main.lindai&y;
	by region;
run;

data main.lindaf&y;
	merge main.lindaf&y(in = left) temp.immiratbyregion&y;
	by region;
	reg_immi_rat = immi_rat&y;
	if missing(reg_immi_rat) then reg_immi_rat = 0;
	label reg_immi_rat = 'Regional immigrant ratio';
	label muni = 'Municipality';
	lan = substr(region,1,2);
	label lan = 'L�n';
	* Create regional groups (counties);
		lan_group1 = (lan in ('01'));			* Stockholm;
		lan_group2 = (lan in ('03','04'));		* Uppsala, S�dermanland;
		lan_group3 = (lan in ('05','06'));		* �sterg�tland, J�nk�ping;
		lan_group4 = (lan in ('07','13'));		* Kronoberg, Halland;
		lan_group5 = (lan in ('08','09','10'));	* Kalmar, Gotland, Blekinge;
		lan_group6 = (lan in ('12'));			* Sk�ne;
		lan_group7 = (lan in ('14'));			* V�stra G�taland;
		lan_group8 = (lan in ('17','18','19'));	* V�rmland, �rebro, V�stmanland;
		lan_group9 = (lan in ('20','21'));		* Dalarna, G�vleborg;
		lan_group10 = (lan in ('22','23'));		* V�sternorrland, J�mtland;
		lan_group11 = (lan in ('24','25'));		* V�sterbotten, Norrbotten;
	drop immi_rat&y;
run;

proc sort data = main.lindaf&y;
	by id;
run;

data main.lindai&y;
	merge main.lindai&y(in = left) temp.immiratbyregion&y;
	by region;
	if left;
	reg_immi_rat = immi_rat&y;
	if missing(reg_immi_rat) then reg_immi_rat = 0;
	label reg_immi_rat = 'Regional immigrant ratio';
	label muni = 'Municipality';
	lan = substr(region,1,2);
	label lan = 'L�n';
	* Create regional groups (counties);
		lan_group1 = (lan in ('01'));			* Stockholm;
		lan_group2 = (lan in ('03','04'));		* Uppsala, S�dermanland;
		lan_group3 = (lan in ('05','06'));		* �sterg�tland, J�nk�ping;
		lan_group4 = (lan in ('07','13'));		* Kronoberg, Halland;
		lan_group5 = (lan in ('08','09','10'));	* Kalmar, Gotland, Blekinge;
		lan_group6 = (lan in ('12'));			* Sk�ne;
		lan_group7 = (lan in ('14'));			* V�stra G�taland;
		lan_group8 = (lan in ('17','18','19'));	* V�rmland, �rebro, V�stmanland;
		lan_group9 = (lan in ('20','21'));		* Dalarna, G�vleborg;
		lan_group10 = (lan in ('22','23'));		* V�sternorrland, J�mtland;
		lan_group11 = (lan in ('24','25'));		* V�sterbotten, Norrbotten;
	drop immi_rat&y;
run;

proc sort data = main.lindai&y;
	by id;
run;

%end;

*merge and save for all years;
data keys.immiregion_allyears;
	merge temp.immiratbyregion1999 temp.immiratbyregion2000 temp.immiratbyregion2001 
		  temp.immiratbyregion2002 temp.immiratbyregion2003 temp.immiratbyregion2004
		  temp.immiratbyregion2005 temp.immiratbyregion2006 temp.immiratbyregion2007;
	by region;
run;

%mend;
%m3ba;
***********************************************************************************************;

